# LOAD REPLICATION DATA ("anonymized") ####
# See paper for a description of variables
# Load survey data ####
load("data_replication.RDATA")

# Load packages
library(pacman)
p_load(tidyverse, data.table,scales,car,
       gridExtra,ggfittext, ggpubr,patchwork,
       gt, modelsummary, webshot2, nplyr)

names(data)
# [1] "id_survey" "answering_person" "twitter" "who_tweets" "twitter_actual_user"
# [5] "how_many_tweet" "party" "party_guidelines" "control_twitter" 




####  Figure 1: Account usage #### 

# Plot 1: Who tweets?

# Count frequencies of each category: 
data_who_tweets <- data %>% 
  filter(!is.na(data$id_survey)) %>% 
  select(who_tweets)  %>% 
  drop_na("who_tweets") %>% 
  count(who_tweets, .drop = FALSE) %>% # Drop NA
  mutate(Pct = round(n / sum(n) * 100, digits = 0)) %>% # Calculate %
  mutate(sample_size = sum(n), # Add SE
         p = Pct/100,
         q = 1-p,
         se = sqrt(p*q/sample_size),
         ci_95 = se*1.96,
         ci_95_Pct = ci_95*100,
         ci_95_Pct_low = Pct-ci_95_Pct,
         ci_95_Pct_high = Pct+ci_95_Pct)


plot_who_tweets <- ggplot(data=data_who_tweets, 
                          aes(x=who_tweets, 
                              y=Pct)) +
  geom_bar(stat="identity") +
  geom_errorbar(aes(ymin=ci_95_Pct_low, ymax=ci_95_Pct_high),
                width=.2,
                color = "darkgray")  +
  geom_text(aes(label = paste0(Pct,"%", "\n(", n, ")"), 
                y = Pct, 
                vjust = ifelse(n < 15, -0.5, 1.1)),
            size = 4, 
            color = ifelse(data_who_tweets$n < 15, "black", "white")) +
  theme_minimal(base_size = 14) + 
  ylab("% of accounts") +  
  xlab("") +
  theme(axis.text.x = element_text(angle = 0, hjust = 0.5)) + 
  scale_x_discrete(labels= function(x) str_wrap(x, width = 10)) +# labels with line break
  scale_y_continuous(labels = function(x) paste0(x, "%")) + # append % sign on y axis
  ggtitle("Plot 1: Who uses the account")

# Plot 2: How many persons are using account X?
data_how_many <- data  %>% 
  filter(!is.na(data$id_survey)) %>% 
  select(twitter_actual_user, how_many_tweet)
# add "1" (1 Person) if twitter_actual_user == 1:
data_how_many$number_twitter_users <- data_how_many$how_many_tweet
data_how_many$number_twitter_users[data_how_many$twitter_actual_user == 1] <- 1
# Drop NAs:
data_how_many <- data_how_many %>% drop_na("number_twitter_users")
data_how_many <- data_how_many %>% select(number_twitter_users)
# nrow(data_how_many) # 80
# For Plot (-> not ignoring 0s):
data_how_many$number_twitter_users <- as.factor(data_how_many$number_twitter_users)
data_how_many$number_twitter_users <- factor(data_how_many$number_twitter_users, levels = c("1", "2", "3", "4", "5", "6", "7"))
# Count frequencies of each category:
data_how_many <- data_how_many %>%
  count(number_twitter_users, .drop = FALSE)
# Calculate %
data_how_many$Pct <- round(data_how_many$n / sum(data_how_many$n) * 100, digits = 0)

# For Plot (-> not ignoring bars with no obs):
x.labels <- c("1","2","3","4","5","6","7")



# Add SE
data_how_many <- data_how_many  %>% 
  mutate(sample_size = sum(n),
         p = Pct/100,
         q = 1-p,
         se = sqrt(p*q/sample_size),
         ci_95 = se*1.96,
         ci_95_Pct = ci_95*100,
         ci_95_Pct_low = Pct-ci_95_Pct,
         ci_95_Pct_high = Pct+ci_95_Pct,
         ci_95_Pct_low = ifelse(ci_95_Pct_low<=0, 0, ci_95_Pct_low)) # replace negative with 0


plot_how_many <- ggplot(data = data_how_many, aes(x = number_twitter_users, y = Pct)) +
  geom_bar(stat = "identity") +
  geom_errorbar(aes(ymin=ci_95_Pct_low, ymax=ci_95_Pct_high),
                width=.2,
                color = "darkgray")  +
  geom_text(aes(
    label = paste0(Pct,"%", "\n(", n, ")"),
    y = n,
    vjust = ifelse(n < 5, -0.2, ifelse(n < 20, 1.5, ifelse(n < 30, 1.8, 
                                                           ifelse(n < 40, 2.1, 2.25))))),
    color = ifelse(data_how_many$n < 5, "black", "white"),
    size = 4
  ) +
  theme_minimal(base_size = 14) +
  ylab("% of accounts") +
  xlab("") +
  scale_x_discrete(drop = F, labels = x.labels) +
  scale_y_continuous(labels = function(x) paste0(x, "%")) + # append % sign on y axis
  theme(axis.text.x = element_text(angle = 0, hjust = 0.5)) + # labels with line break
  ggtitle("Plot 2: Number of persons operating \nthe account")

figure1 <- plot_who_tweets + plot_how_many +
  plot_layout(ncol = 2) # 1

figure1 <- figure1 + plot_annotation(
  #title = 'Figure 1: Account usage',
  theme = theme(plot.title = element_text(size = 20),
                plot.subtitle = element_text(size = 16)),
  caption = 'Note: Data comprises a non-random sample collected from 709 members of the German Bundestag in 2020. Gray bars represent 95% confidence intervals.'
)

ggsave(filename = "fig_1_account_usage.png",
       plot = figure1,
       width = 10,
       height =4,
       units = "in",
       dpi = 300)

figure1











#### Figure 2: Are there party guidelines on Twitter? ####

# Create new df with the selected variables:
data_guidelines <- data  %>% 
  filter(!is.na(data$id_survey)) %>%
  filter(twitter == 2) %>%
  select(party, party_guidelines)

# Drop NAs:
data_guidelines <- data_guidelines %>% drop_na("party")
# As factors:
data_guidelines$party_guidelines <- as.factor(data_guidelines$party_guidelines)

# Count:
data_guidelines <- data_guidelines %>%
  group_by(party) %>%
  count(party_guidelines, .drop = FALSE)



# Calculate number of respondents (among politicians) that have a Twitter account
data2 <- data   %>% 
  filter(!is.na(data$id_survey)) %>%
  filter(twitter == 2) %>%
  group_by(party) %>%
  mutate(resp_partymembers_n = n())

df_resp_partymembers_n <- data2 %>%
  group_by(party) %>%
  select("party", "resp_partymembers_n") %>%
  slice(1)

data_guidelines <- left_join(data_guidelines, df_resp_partymembers_n, by = "party", all.x = TRUE)

# calculate % & round:
data_guidelines$Pct <- data_guidelines$n / data_guidelines$resp_partymembers_n * 100
data_guidelines$Pct <- round(data_guidelines$Pct, digits = 0)
# For plot (legend):
data_guidelines$party <- factor(data_guidelines$party,
                                levels = c("CDU/CSU","SPD","AfD","FDP","The Left","Greens"))

# Add SE
data_guidelines <- data_guidelines  %>% 
  mutate(sample_size = sum(n),
         p = Pct/100,
         q = 1-p,
         se = sqrt(p*q/sample_size),
         ci_95 = se*1.96,
         ci_95_Pct = ci_95*100,
         ci_95_Pct_low = Pct-ci_95_Pct,
         ci_95_Pct_high = Pct+ci_95_Pct,
         ci_95_Pct_low = ifelse(ci_95_Pct_low<=0, 0, ci_95_Pct_low),             # replace negative with 0
         ci_95_Pct_high = ifelse(ci_95_Pct_high>=100, 100, ci_95_Pct_high)) %>% # replace over 100 with 100
  mutate(party_guidelines = factor(party_guidelines,
                                   levels = c("No", "Do not know", "Yes"),
                                   ordered = TRUE))# ordering




plot_guidelines <- ggplot(data_guidelines, 
                          aes(party_guidelines, Pct, fill = party)) +
  geom_bar(stat = "identity", 
           position = position_dodge(0.9), 
           width = 0.85) +
  geom_errorbar(aes(ymin=ci_95_Pct_low, ymax=ci_95_Pct_high),
                width=.2,
                color = "darkgray", 
                position = position_dodge(0.9))  +
  geom_text(aes(label = paste0(Pct, "%\n(", n, ")")),
            vjust = ifelse(data_guidelines$Pct < 30, -0.2, 1.2),
            color = ifelse(data_guidelines$Pct < 30 | (data_guidelines$party == "FDP"), "black", "white"),
            size = 4) +
  ggtitle("") +
  theme_minimal(base_size = 12) +
  ylab("% of respondents") +
  xlab("") +
  scale_y_continuous(labels = function(x) paste0(x, "%")) + # append % sign on y axis
  scale_fill_manual(values = c("#000000", "#EB001F", "#009EE0", "#FFED00", "#8C3473", "#64A12D")) +
  scale_x_discrete(labels = function(x) str_wrap(x, width = 7)) +
  facet_wrap(~party) +
  theme(
    axis.text.x = element_text(angle = 0, hjust = 0.5),
    legend.position = "none", 
    legend.key.size = unit(0.6, "cm")) +
  labs(fill = "Party") #+# labels with line break



figure2 <- plot_guidelines +
  plot_layout(ncol = 1) 

figure2 <- figure2 + plot_annotation(
  #title = "Figure 2: Party guidelines",
  subtitle = "Does your party have rules or guidelines for dealing with Twitter?",
  theme = theme(
    plot.title = element_text(size = 20),
    plot.subtitle = element_text(size = 16)
  ),
  caption = 'Note: Data comprises a non-random sample collected from 709 members of the German Bundestag in 2020. Gray bars represent 95% confidence intervals.'
  
)

ggsave(
  filename = "fig_2_guidelines.png",
  plot = figure2,
  width = 10,
  height = 4.5, 
  units = "in",
  dpi = 300
)

figure2














#### Are tweets posted by a central office in the party? #### 

# Create new df with the selected variables: 
df_vars <- data  %>% 
  filter(!is.na(data$id_survey)) %>% 
  filter(twitter == 2) %>% 
  select(party, control_twitter) 
# Drop NAs: 
df_vars <- df_vars %>% drop_na("party") 
# Rename: 
df_vars <- rename(df_vars, party = party)
# As factors: 
df_vars$party <- as.factor(df_vars$party) 
df_vars$control_twitter <- as.factor(df_vars$control_twitter )
# Count: 
df_vars <- df_vars %>% group_by(party) %>%
  count(control_twitter, .drop = FALSE)

# calculate number of respondents per party who have a Twitter account
data2 <- data  %>% 
  filter(!is.na(data$id_survey)) %>% 
  filter(twitter == 2)%>%
  group_by(party) %>%
  mutate(resp_partymembers_n = n()) 
df_resp_partymembers_n <- data2 %>%
  group_by(party) %>%
  select("party", "resp_partymembers_n") %>%
  slice(1)
df_resp_partymembers_n <- rename(df_resp_partymembers_n, party = party) 
df_vars <- left_join(df_vars, df_resp_partymembers_n, by = "party", all.x = TRUE)
# calculate % & round: 
df_vars$Pct <- df_vars$n / df_vars$resp_partymembers_n * 100 
df_vars$Pct  <- round(df_vars$Pct, digits = 0)

df_vars <- df_vars %>% arrange(control_twitter, party)

print(df_vars,n=nrow(df_vars))






